base_dir <- getwd()
in_file <- paste0(base_dir, "/data/", roi, "_QUANT.tsv")
allmarkers_outfile <- paste0(base_dir, "/data/for_seurat/allmarkers_", roi, "_qcd_withids.csv")
filteredmarkers_outfile <- paste0(base_dir, "/data/for_seurat/", roi, "_qcd_filtered_markers_withids.csv")
cluster_outfile <- paste0(base_dir, "/data/cluster_output/", roi, "_clusters.csv")
# Metric for clustering
cluster_metric <- "Cell: Median"
# QC parameters
sigsum_quantile_high <- 0.99
sigsum_quantile_low <- 0.05
bin_size <- 50
density_cutoff <- 5
# Clustering parameters
# markers_selected <- c("BCL2", "FOXP3", "LDHA", "CD11b", "CD20", "HLADR", "ICOS", ### Full quality panel
# "CD31", "CD4", "CD14", "CD45RO", "CD107a", "CD68", "CD57",
# "CD3e", "Ki67", "CD38", "CD123", "CD11c", "CD8", "PD1",
# "CD44", "PDL1", "CD45")
markers_selected <- c("BCL2", "FOXP3", "CD11b", "CD20", "ICOS", ### Remove functional markers
"CD31", "CD4", "CD14", "CD45RO", "CD107a",
"CD68", "CD57", "CD3e", "CD38", "CD11c", "CD8",
"CD44", "CD45")
# clustering_res <- 1.5
min_clusters <- 6
There are two main quality control steps:
# Read in data
roi_df <- read.table(in_file, sep = "\t", header = T, check.names = FALSE)
# Reformat column names
colnames(roi_df) <- sub(": ", ": Cell: ", colnames(roi_df)) # Insert "Cell:" into the column names for the marker genes for Seurat
colnames(roi_df) <- sub("\u00B5", "u", colnames(roi_df)) # replace mu with u
# Extract the means for each marker
means_only_df <- roi_df %>% select(contains("Cell: Mean"))
# Get sum of all signals for each cell
means_only_df$sigsum <- rowSums(means_only_df)
# Calculate percentile cutoffs
sigsum_cutpoint_1 <- quantile(means_only_df$sigsum, sigsum_quantile_high)
sigsum_cutpoint_2 <- quantile(means_only_df$sigsum, sigsum_quantile_low)
print(paste("High sigsum:", sigsum_cutpoint_1, "Low sigsum:", sigsum_cutpoint_2))
## [1] "High sigsum: 89213.256294 Low sigsum: 25286.09846"
# Plot distribution of sigsums with cutoffs
ggplot(data = means_only_df, aes(x = sigsum)) +
geom_histogram() +
theme_bw() +
geom_vline(aes(xintercept = sigsum_cutpoint_1)) +
geom_vline(aes(xintercept = sigsum_cutpoint_2))
# Add metric to df
roi_df$sigsum_metric = "Cell"
roi_df$sigsum_metric[means_only_df$sigsum > sigsum_cutpoint_1] <- "SIGSUM High"
roi_df$sigsum_metric[means_only_df$sigsum < sigsum_cutpoint_2] <- "SIGSUM Low"
roi_df$sigsum <- means_only_df$sigsum
print("Sigsum metrics:")
## [1] "Sigsum metrics:"
table(roi_df$sigsum_metric, useNA = "ifany")
##
## Cell SIGSUM High SIGSUM Low
## 45440 484 2418
# Calculate bins for cell density
roi_df$binX <- floor(roi_df$`Centroid X um`/bin_size)
roi_df$binY <- floor(roi_df$`Centroid Y um`/bin_size)
# Count cells in each bin
bincounts <- roi_df %>% count(binX, binY) %>% rename(bin_density = n)
# Add bin density metric to df
roi_df <- roi_df %>% inner_join(bincounts) %>% mutate(low_bin_density = bin_density <= density_cutoff)
top = min(roi_df$`Centroid Y um`) + max(roi_df$`Centroid Y um`)
roi_df$invertY = top - roi_df$`Centroid Y um`
# Plot cells with low density
ggplot(roi_df, aes(x = `Centroid X um`, y = invertY, fill = low_bin_density)) + ## Fix this to plot correctly
geom_point(pch=21,colour="white") +
ylab("Centroid Y um") +
theme_minimal()
# Filter on sigsum
roi_df <- roi_df %>% filter((sigsum < sigsum_cutpoint_1) & (sigsum > sigsum_cutpoint_2))
# Filter out low bin density
roi_df <- roi_df %>% filter(bin_density > density_cutoff)
roi_df$cellid <- paste0(roi, "_", 1:nrow(roi_df))
roi_df <- roi_df %>% select(c("cellid", colnames(roi_df)[1:ncol(roi_df) - 1]))
cols_keep <- c("cellid", "Image", "Object ID", "Name", "Class", "Parent",
"ROI", "Centroid X um", "Centroid Y um", "Area um^2", "Length um",
"Circularity", "Solidity", "Max diameter um", "Min diameter um",
"sigsum_metric", "sigsum",
"binX", "binY", "bin_density", "low_bin_density")
roi_df <- roi_df %>% select(all_of(cols_keep), contains("Cell:"))
fwrite(roi_df, allmarkers_outfile)
roi_df <- roi_df %>% select(all_of(cols_keep), contains(paste0(markers_selected, ": Cell")))
# Save filtered df to read into Seurat
fwrite(roi_df, filteredmarkers_outfile)
codex.obj <- LoadAkoyaCustom(filename = filteredmarkers_outfile, type = "qupath", fov = roi)
codex.obj <- normalize_and_reduce(codex.obj)
Clustree is used to help determine what clustering resolution should be used. Resolutions producing stable clusters (clusters which maintain more of the same cells across resolutions) are desirable.
codex.obj <- FindNeighbors(object = codex.obj, dims = c(1:length(markers_selected)), verbose = FALSE)
codex.obj <- FindClusters(object = codex.obj, verbose = FALSE, resolution = seq(0.1,1.1,0.2), n.start = 1)
tree <- clustree(codex.obj, prefix = "Akoya_snn_res.")
plot(tree)
treedata <- tree$data
tree_summary <- treedata %>% group_by(Akoya_snn_res.) %>% summarise(sc3_sd = min(sc3_stability) + sd(sc3_stability), n_cluster = n()) %>% filter(n_cluster > min_clusters)
clustering_res <- as.numeric(as.character(tree_summary$Akoya_snn_res.[which.max(tree_summary$sc3_sd)]))
if(length(clustering_res) > 1) clustering_res <- clustering_res[1]
final_nclust <- tree_summary$n_cluster[which.max(tree_summary$sc3_sd)]
treedata$`Selected resolution` <- treedata$Akoya_snn_res. == clustering_res
tree_summary <- treedata %>% group_by(Akoya_snn_res.) %>% summarise(sc3_sd = min(sc3_stability) + sd(sc3_stability), n_cluster = n()) %>% select(Resolution = Akoya_snn_res., `# clusters` = n_cluster)
kable(tree_summary, align = "r")
| Resolution | # clusters |
|---|---|
| 0.1 | 6 |
| 0.3 | 6 |
| 0.5 | 11 |
| 0.7 | 14 |
| 0.9 | 15 |
| 1.1 | 18 |
ggplot(treedata, aes(x = Akoya_snn_res., y = size, fill = `Selected resolution`)) +
geom_boxplot() +
geom_jitter(width = 0.2) +
theme_bw() +
xlab("Resolution") +
ylab("Cells per cluster") +
ggtitle("Distribution of cluster sizes per resolution")
Multiply resolution by 2 to increase the number of clusters calculated
clustering_res <- clustering_res * 2
codex.obj <- cluster_seurat(codex.obj, res = clustering_res)
final_nclust <- length(table(codex.obj$seurat_clusters))
clusters_out <- codex.obj@meta.data %>% select(seurat_clusters)
clusters_out <- cbind(clusters_out, GetTissueCoordinates(codex.obj@images[[roi]]))
clusters_out$seurat_clusters <- paste0("cluster_", clusters_out$seurat_clusters)
fwrite(clusters_out, cluster_outfile)
p1 <- DimPlot(codex.obj, label = T) + NoLegend()
p2 <- ImageDimPlot(codex.obj)
p1 + p2
plot_avg_heatmap <- function(codex.obj, order_manual = FALSE, col_ord = NULL, row_ord = NULL, scale_by = "row") {
mat <- codex.obj@assays$Akoya@scale.data
mat <- t(mat) %>% as.data.frame()
mat$cluster <- codex.obj$seurat_clusters
# Get means for each cluster
smSub <- mat %>%
group_by(cluster) %>%
summarise_all(mean, na.rm = TRUE) %>%
mutate_all(funs(replace(., is.na(.), 0))) %>%
ungroup()
# Get number of cells per cluster for annotation
annoBarValues <- as.data.frame(table(mat$cluster))$Freq
# Create matrix to be used in the heatmap
if (scale_by == "row") {
mat2 <- smSub %>%
select(-c(cluster)) %>% replace(is.na(.), 0) %>%
as.matrix() %>% t() %>% pheatmap:::scale_rows()
} else if (scale_by == "column") {
mat2 <- smSub %>%
select(-c(cluster)) %>% replace(is.na(.), 0) %>%
as.matrix() %>% pheatmap:::scale_rows() %>% t()
} else print("Select a valid argument for scale_by: row or column.")
## Annotation for cluster
ha = HeatmapAnnotation(Cluster = smSub$cluster,
ClusterID = anno_text(smSub$cluster, gp = gpar(fontsize = 12)))
# Create barplot annotation for cluster size for bottom of heatmap
ba = HeatmapAnnotation(CellCount = anno_barplot(annoBarValues,height=unit(2, "cm")))
mat2[is.nan(mat2)] <- 0
colnames(mat2) <- smSub$cluster
col_fun = colorRamp2(c(-2, -0.5, 2), c("white", "#BAFBD8", "#004C23"))
if (order_manual) {
if (!is.null(row_ord)) {
Heatmap(mat2,
row_names_gp = gpar(fontsize = 13),
top_annotation = ha,
bottom_annotation = ba,
column_order = col_ord,
row_order = row_ord,
border = TRUE)
} else {
Heatmap(mat2,
row_names_gp = gpar(fontsize = 13),
top_annotation = ha,
bottom_annotation = ba,
column_order = col_ord,
border = TRUE)
}
} else {
Heatmap(mat2,
row_names_gp = gpar(fontsize = 13),
top_annotation = ha,
bottom_annotation = ba,
border = TRUE)
}
}
Top heatmaps show average marker intensity per cluster, bottom heatmaps show marker intensity on a single-cell level. Change tabs to see plots for all available markers.
Average heatmaps are scaled by column (cluster)
Scaled by column (cluster)
## Get row and column orders for subsequent heatmaps
ht_median <- plot_avg_heatmap(codex.obj, scale_by = "column")
ht_median <- draw(ht_median)
col_ord <- unlist(column_order(ht_median))
row_ord <- unlist(row_order(ht_median))
# png(paste0("plots/01-", roi, "_heatmap_12marker.png"), height = 6, width = 10, units = "in", res = 300)
# draw(ht_median)
# dev.off()
Scaled by row (channel)
ht_median_rowscale <- plot_avg_heatmap(codex.obj, scale_by = "row", col_ord = col_ord, row_ord = row_ord, order_manual = T)
ht_median_rowscale <- draw(ht_median_rowscale)
# DoHeatmap(codex.obj) # +
# theme(axis.text.y = element_text(size = 12), title = element_text(size = 20))
RidgePlot(codex.obj, features = markers_selected, ncol = 3)
Scaled by column (cluster)
codex.obj_allmarkers <- LoadAkoyaCustom(filename = allmarkers_outfile, type = "qupath", fov = roi)
if (all(codex.obj$cellid == codex.obj_allmarkers$cellid)) {
codex.obj_allmarkers$seurat_clusters <- codex.obj$seurat_clusters
Idents(codex.obj_allmarkers) <- codex.obj_allmarkers$seurat_clusters
}
codex.obj_allmarkers <- suppressMessages(NormalizeData(object = codex.obj_allmarkers, normalization.method = "CLR", margin = 2))
codex.obj_allmarkers <- suppressMessages(ScaleData(codex.obj_allmarkers))
VariableFeatures(codex.obj_allmarkers) <- rownames(codex.obj_allmarkers)
ht_all <- plot_avg_heatmap(codex.obj_allmarkers, order_manual = T, col_ord = col_ord, scale_by = "column")
ht_all <- draw(ht_all)
row_ord_all <- row_order(ht_all)
# png(paste0("plots/01-", roi, "_heatmap_20marker.png"), height = 6, width = 10, units = "in", res = 300)
# draw(ht_all)
# dev.off()
# DoHeatmap(codex.obj_allmarkers) +
# theme(axis.text.y = element_text(size = 12), title = element_text(size = 20))
Scaled by row (channel)
ht_all_roword <- plot_avg_heatmap(codex.obj_allmarkers, order_manual = T, col_ord = col_ord, row_ord = row_ord_all, scale_by = "row")
ht_all_roword <- draw(ht_all_roword)
RidgePlot(codex.obj_allmarkers, features = codex.obj_allmarkers@assays$Akoya@var.features, ncol = 3)